/*------------------------------------------------------------------------
Copyright (C) 2002-2016 SIL International. All rights reserved.

Distributable under the terms of either the Common Public License or the
GNU Lesser General Public License, as specified in the LICENSING.txt file.

File: TECkit_Format.h
Responsibility: Jonathan Kew
Last reviewed: Not yet.

Description:
    Definitions used in the TECkit binary table format

        2006-06-02	jk	added support for extended string rules (>255
per initial char)
-------------------------------------------------------------------------*/

#ifndef __TECkit_Format_H__
#define __TECkit_Format_H__

#include "teckit-Common.h"

#define kMagicNumber 0x714d6170    /* 'qMap' */
#define kMagicNumberCmp 0x7a516d70 /* 'zQmp' */

#define kFileVersion2_1                                                        \
  0x00020001 /* version before tables with ExtStringRules */
#define kCurrentFileVersion 0x00030000 /* current version */

#define kTableVersion2 0x00020000
#define kCurrentTableVersion                                                   \
  0x00030000 /* actually, the engine doesn't check this,                       \
                                                             it only looks at  \
                the file version */

struct NameRec {
  UInt16 nameID;
  UInt16 nameLength;
  /*
          Byte	data[nameLength];
          pad to 2-byte boundary
  */
};

struct FileHeader {
  UInt32 type;         /* magic number = 'qMap' */
  UInt32 version;      /* version = kFileCurrentVersion */
  UInt32 headerLength; /* length of this header including offset arrays and name
                          records */
  UInt32 formFlagsLHS; /* flags for normalization form, Unicode/byte encoding on
                          LHS of mapping */
  UInt32 formFlagsRHS; /* flags for normalization form, Unicode/byte encoding on
                          RHS of mapping */
  UInt32 numNames;     /* number of strings in the names table */
  UInt32 numFwdTables; /* number of tables in forward pipeline */
  UInt32 numRevTables; /* number of tables in reverse pipeline */
};

struct TableHeader {
  UInt32 type; /* type = 'B->B', 'B->U', 'U->B', 'U->U' */
  /* or type = 'NFC ', 'NFD ', and no additional header fields are present */
  UInt32 version; /* version = kCurrentTableVersion */
  UInt32 length;  /* total length of this table */
  UInt32
      flags;         /* flags:
                                             0x00000001:	supplementary-plane Unicode
                        characters supported in mapping and classes         0x00000002:	DBCS
                        support (BB/BU tables only) in lookup table
                                     */
  UInt32 pageBase;   /* offset from table header to page table (Ux tables) or
                        dbcsPage table (Bx tables) */
  UInt32 lookupBase; /* offset from table header to lookup table(s) */
  UInt32
      matchClassBase;  /* offset from table header to match class definitions */
  UInt32 repClassBase; /* offset from table header to replacement class
                          definitions */
  UInt32 stringListBase; /* offset from table header to string rule lists */
  UInt32 stringRuleData; /* offset from table header to string rule data */
  UInt8 maxMatch;        /* max number of input code units matched by a rule */
  UInt8 maxPre;    /* max number of input code units matched by pre-context */
  UInt8 maxPost;   /* max number of input code units matched by post-context */
  UInt8 maxOutput; /* max number of output code units generated by a rule */
  UInt32 replacementChar; /* default output for unmapped codes */
};
#ifndef __cplusplus
typedef struct TableHeader TableHeader;
#endif

#define kTableType_BB 0x422d3e42
#define kTableType_BU 0x422d3e55
#define kTableType_UB 0x552d3e42
#define kTableType_UU 0x552d3e55

#define kTableType_NFC 0x4e464320
#define kTableType_NFD 0x4e464420

#define kTableFlags_Supplementary 0x0001
#define kTableFlags_DBCS 0x0002

union Lookup {
  /* for any table when string rules are used */
  struct {
    UInt8 type;       /*
                                              0xff: use string rules
                                              0xfe: illegal DBCS trailing byte
                                              0xfd: unmapped character: copy (BB/UU)
                         or output default (UB/BU)       0x00-0x03: direct lookup
                                      */
    UInt8 ruleCount;  /* number of rules for this code */
    UInt16 ruleIndex; /* index into stringList of start of rule list for this
                         code */
  } rules;
  /* for UB and BB tables with direct byte output */
  struct {
    UInt8 count; /* count of bytes present in data[]: 0-3 */
    UInt8 data[3];
  } bytes;
  /* for BU and UU tables with direct Unicode output */
  UInt32 usv; /* unicode scalar value */
};
#ifndef __cplusplus
typedef union Lookup Lookup;
#endif

#define kLookupType_StringRules 0xff
#define kLookupType_IllegalDBCS 0xfe
#define kLookupType_Unmapped 0xfd

#define kLookupType_RuleTypeMask 0xc0
#define kLookupType_ExtStringRules 0x80
#define kLookupType_ExtRuleCountMask 0x3f

/*
        /rules.ruleOffset/ points to an array of /rules.ruleCount/ UInt32 values
   which are the offsets from stringRuleData to each rule to test for this
   character
*/

struct StringRule {
  UInt8 matchLength; /* length of match string in matchElements */
  UInt8 postLength;  /* length of post-context in matchElements */
  UInt8 preLength;   /* length of pre-context in matchElements */
  UInt8 repLength;   /* length of replacement string in repElements */
};
#ifndef __cplusplus
typedef struct StringRule StringRule;
#endif

union MatchElem {
#ifdef __cplusplus
  MatchElem() {}
#endif
  struct {
    UInt8 repeat; /* repeat count: (min << 4) + max */
    UInt8 type;   /*
                                          0x80:	negate flag (not allowed   with
                     group)   0x40:	non-literal flag--if set, bits 0x3f   indicate
                     specific type (value must not be zero)   Note that if
                     'non-literal' flag is NOT set, remaining bits are not used as
                     type code   but are part of a USV value (or must be set to zero
                     for literal byte data).
                                  */
    UInt16 reserved;
  } flags;
  union {
    struct {
      UInt16 reserved;
      UInt8 dNext;  /* offset to following OR or EGroup element */
      UInt8 dAfter; /* offset to element after the group for BGroup */
    } bgroup;
    struct {
      UInt16 reserved;
      UInt8 dNext;  /* offset to following OR or EGroup element (for OR only) */
      UInt8 dStart; /* reverse offset to corresponding BGroup */
    } egroup;       /* (also used for OR elements) */
    struct {
      UInt16 reserved;
      UInt16 index; /* index of character class */
    } cls;
    struct {
      UInt8 reserved[3];
      UInt8 data; /* literal byte */
    } byte;
    struct {
      UInt32
          data; /* literal Unicode scalar: must mask with kUSVMask, as top bits
                   overlap flags.repeat and "negate" bit in flags.type */
    } usv;
  } value;
};
#ifndef __cplusplus
typedef union MatchElem MatchElem;
#endif

#define kMatchElem_Negate 0x80 /* negated test */
#define kMatchElem_NonLit                                                      \
  0x40 /* test value is not a literal character; need to check type */

#define kMatchElem_TypeMask                                                    \
  0x3f /* Mask for type value. Note that type 0 must not be used (=literal) */
#define kMatchElem_Type_Class 0x01  /* class match */
#define kMatchElem_Type_BGroup 0x02 /* begin group */
#define kMatchElem_Type_EGroup 0x03 /* end group */
#define kMatchElem_Type_OR 0x04     /* special code: OR */
#define kMatchElem_Type_ANY 0x05    /* special code: ANY */
#define kMatchElem_Type_EOS 0x06    /* special code: EOS */
#define kMatchElem_Type_Copy                                                   \
  0x07 /* copy matched item (invalid; for internal compiler use) */

#define kUSVMask 0x001fffff

union RepElem {
  struct {
    UInt8 type;       /* see kRepElem_... below */
    UInt8 matchIndex; /* index of corresponding item in matchString for type ==
                         kRepElem_Class or kRepElem_Copy */
    UInt16 repClass;  /* repClass if type == kRepElem_Class */
  } flags;
  UInt32 value; /* literal value (mask with kUSVMask) if flags.type ==
                   kRepElem_Literal */
};
#ifndef __cplusplus
typedef union RepElem RepElem;
#endif

#define kRepElem_Literal 0x00
#define kRepElem_Class kMatchElem_Type_Class
#define kRepElem_Copy kMatchElem_Type_Copy
#define kRepElem_Unmapped 0x0f /* used in default terminator rules */

#endif /* __TECkit_Format_H__ */
